//
// Created by wserver on 2020/7/29.
//

#include <stdio.h>

__global__ void AplusB(int *ret, int a, int b) {

  int ts = __shfl_up_sync(0xFFFFFFFF, *ret, 1, 32);
  printf("%d \n", ts);
}
int main() {
  int *ret;
  cudaMalloc(&ret, 1000 * sizeof(int));
  AplusB<<< 1, 10 >>>(ret, 10, 100);
  cudaFree(ret);
  return 0;
}